library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## âś” ggplot2 3.0.0 âś” purrr 0.2.5
## âś” tibble 1.4.2 âś” dplyr 0.7.6
## âś” tidyr 0.8.1 âś” stringr 1.3.1
## âś” readr 1.1.1 âś” forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
# devtools::install_github("privefl/bigsnpr")
library(bigsnpr)
## Loading required package: bigstatsr
# devtools::install_github("privefl/paper2-PRS/pkg.paper.PRS")
library(pkg.paper.PRS) ## See code source for functions' code.
options(nboot = 1e5)
Useful functions
COLORS <- scales::hue_pal()(6)
methods.color <- setNames(
c(COLORS, COLORS[c(4, 2, 5)], "black"),
c("logit-simple", "PRS-max", "PRS-stringent", "T-Trees", "logit-triple", "PRS-all",
"PRS-max-0.05", "PRS-max-0.2", "PRS-max-0.8", "biglasso")
)
myggplot <- function(..., coeff = 1) {
ggplot(...) +
bigstatsr::theme_bigstatsr(size.rel = coeff) +
theme(strip.text.x = element_text(size = rel(2)),
strip.text.y = element_text(size = rel(2)))
}
barplot_causal <- function(results) {
results %>%
myggplot(aes(par.causal, AUC_mean, fill = method, color = method)) +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_col(position = position_dodge(), alpha = 0.5, color = "black", size = 1) +
geom_errorbar(aes(ymin = AUC_mean - 2 * AUC_boot, ymax = AUC_mean + 2 * AUC_boot),
position = position_dodge(width = 0.9), color = "black", width = 0.2, size = 1) +
scale_y_continuous(limits = c(0.5, NA), minor_breaks = 0:20 / 20,
oob = scales::rescale_none) +
labs(x = "Causal SNPs (number and location)", y = "Mean of 100 AUCs",
fill = "Method", color = "Method") +
scale_fill_manual(values = methods.color) +
scale_color_manual(values = methods.color)
}
barplot_causal_one <- function(results, h2) {
auc_max <- `if`(h2 == 0.8, 0.94, 0.84)
results %>%
filter(par.h2 == h2) %>%
barplot_causal() +
geom_hline(yintercept = auc_max, linetype = 3, color = "blue") +
facet_grid(par.dist ~ .)
}
barplot_causal_all <- function(results) {
results %>%
barplot_causal() +
geom_hline(aes(yintercept = auc_max), linetype = 3, color = "blue",
data = data.frame(par.h2 = c(0.5, 0.8), auc_max = c(0.84, 0.94))) +
facet_grid(par.dist ~ par.h2)
}
compare_logit <- function(results, with_method) {
results <- filter(
results, method %in% c("logit-simple", with_method), par.h2 == 0.8)
results.summary <- results %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n())
nsimu <- results.summary$N[[1]]
# stopifnot(all(results.summary$N == nsimu))
plot_extra <- function(y, ylab = y) {
results %>%
myggplot() +
geom_boxplot(aes_string("par.causal", y, color = "method",
fill = "method"), alpha = 0.3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(par.model ~ par.dist) +
labs(x = "Causal SNPs (number and location)", y = ylab,
fill = "Method", color = "Method") +
theme(legend.position = "none") +
scale_fill_manual(values = methods.color) +
scale_color_manual(values = methods.color) +
scale_y_continuous(limits = c(0, NA))
}
p1 <- results.summary %>%
barplot_causal_one(h2 = 0.8) +
facet_grid(par.model ~ par.dist) +
labs(y = sprintf("Mean of %d AUCs", nsimu))
p2 <- cowplot::plot_grid(
plot_extra("nb.preds", "Number of predictors"),
plot_extra("timing", "Execution time (in seconds)"),
ncol = 1, align = "hv", scale = 0.95,
labels = LETTERS[2:3], label_size = 20
)
p12 <- cowplot::plot_grid(
p1 + theme(legend.position = "none"), p2,
ncol = 2, scale = 0.95, rel_widths = c(6, 4),
labels = c("A", ""), label_size = 20
)
cowplot::plot_grid(
cowplot::get_legend(p1 + theme(legend.direction = "horizontal")), p12,
rel_heights = c(0.1, 1), ncol = 1
)
}
G <- snp_attach("backingfiles/celiacQC_sub1.rds")$genotypes
corr <- snp_cor(G, size = 100)
saveRDS(corr, "backingfiles/corr2.rds")
corr <- readRDS("backingfiles/corr2.rds")
Results with T-Trees
results1 <- list.files("results1", full.names = TRUE) %>%
read_format_results(corr)
compare_logit(results1, with_method = "T-Trees")

ggsave("figures/supp-ttrees.pdf", scale = 1/100, width = 1400, height = 1060)
Results with logit-triple & model fancy
results2 <- list.files("results2", full.names = TRUE) %>%
read_format_results(corr)
Results with logit-triple
compare_logit(results2, with_method = "logit-triple")

ggsave("figures/supp-triple.pdf", scale = 1/100, width = 1400, height = 1060)
results2 %>%
filter(grepl("logit", method)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_one(h2 = 0.5) +
facet_grid(par.model ~ par.dist)

ggsave("figures/supp-AUC-triple.pdf", scale = 1/110, width = 1242, height = 951)
Results with model fancy
results2 %>%
filter(par.model == "fancy", method %in% c("logit-simple", "PRS-max")) %>%
filter(thr.r2 == 0.2 | is.na(thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_all()

ggsave("figures/supp-AUC-logit-fancy.pdf", scale = 1/110, width = 1242, height = 951)
results2 %>%
filter(par.model == "fancy", grepl("PRS", method)) %>%
filter(thr.r2 == 0.2 | is.na(thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_all()

ggsave("figures/supp-AUC-PRS-fancy.pdf", scale = 1/110, width = 1242, height = 951)